When the Python Spatial Analysis Library, PySAL, was originally planned, the intention was to focus on the computational aspects of exploratory spatial data analysis and spatial econometric methods, while relying on existing GIS packages and visualization libraries for visualization of computations. Indeed, we have partnered with esri and QGIS towards this end.
However, over time we have received many requests for supporting basic geovisualization within PySAL so that the step of having to interoperate with an external package can be avoided, thereby increasing the efficiency of the spatial analytical workflow.
In this notebook, we demonstrate several approaches towards a particular subset of geovisualization methods, namely choropleth maps. We start with an exploratory workflow introducing mapclassify and geopandas to create different choropleth classifications and maps for quick exploratory data analysis. We then introduce the geoviews package for interactive mapping in support of exploratory spatial data analysis.
import mapclassify
import geopandas as gpd
import matplotlib.pyplot as plt
import contextily as cx
shp_link = 'data/scag_region.gpkg'
gdf = gpd.read_file(shp_link)
gdf = gdf.to_crs(3857)
f, ax = plt.subplots(1, figsize=(12, 8))
gdf.plot(column='median_home_value', scheme='QUANTILES', ax=ax,
edgecolor='white', legend=True, linewidth=0.3)
ax.set_axis_off()
plt.show()
As a first cut, geopandas makes it very easy to plot a map quickly. If you know the area well, this may do fine for quick exploration. If you don't know a place extremely well (or you want to make a figure easy to understand for those who don't) it's often a good idea to add a basemap for context. We can do that easily using the contextily package
gdf.fillna(gdf.mean(), inplace = True)
/srv/conda/envs/notebook/lib/python3.7/site-packages/ipykernel_launcher.py:1: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError. Select only valid columns before calling the reduction. """Entry point for launching an IPython kernel.
f, ax = plt.subplots(1, figsize=(12, 8))
gdf.plot(column='median_home_value', scheme='QUANTILES', alpha=0.6, ax=ax, legend=True)
cx.add_basemap(ax, crs=gdf.crs.to_string(), source=cx.providers.Stamen.TonerLite)
ax.set_axis_off()
plt.show()
hv = gdf['median_home_value']
mapclassify.Quantiles(hv, k=5)
Quantiles
Interval Count
--------------------------------
[ 10888.42, 276462.96] | 916
( 276462.96, 368936.70] | 917
( 368936.70, 457037.70] | 993
( 457037.70, 628216.01] | 840
( 628216.01, 1088952.40] | 914
mapclassify.Quantiles(hv, k=10)
Quantiles
Interval Count
--------------------------------
[ 10888.42, 203688.34] | 458
( 203688.34, 276462.96] | 458
( 276462.96, 325454.88] | 458
( 325454.88, 368936.70] | 459
( 368936.70, 406233.29] | 457
( 406233.29, 457037.70] | 536
( 457037.70, 512928.74] | 380
( 512928.74, 628216.01] | 460
( 628216.01, 778142.83] | 456
( 778142.83, 1088952.40] | 458
q10 = mapclassify.Quantiles(hv, k=10)
dir(q10)
['__call__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_classify', '_fmt', '_set_bins', '_summary', '_table_string', '_update', 'adcm', 'bins', 'classes', 'counts', 'find_bin', 'fmt', 'gadf', 'get_adcm', 'get_fmt', 'get_gadf', 'get_legend_classes', 'get_tss', 'k', 'make', 'name', 'plot', 'set_fmt', 'table', 'tss', 'update', 'y', 'yb']
q10.bins
array([ 203688.34269663, 276462.9588015 , 325454.87827715,
368936.70411985, 406233.28651685, 457037.69973266,
512928.73595506, 628216.01123596, 778142.82771536,
1088952.39981273])
q10.counts
array([458, 458, 458, 459, 457, 536, 380, 460, 456, 458])
fj10 = mapclassify.FisherJenks(hv, k=10)
fj10
FisherJenks
Interval Count
--------------------------------
[ 10888.42, 191982.12] | 428
( 191982.12, 278662.64] | 510
( 278662.64, 348355.52] | 663
( 348355.52, 417721.72] | 800
( 417721.72, 492314.89] | 679
( 492314.89, 580955.52] | 394
( 580955.52, 680594.57] | 392
( 680594.57, 804626.12] | 297
( 804626.12, 975482.58] | 184
( 975482.58, 1088952.40] | 233
fj10.adcm
98231639.16239837
q10.adcm
127632537.27184954
bins = [100000, 500000, 1000000, 1500000]
ud4 = mapclassify.UserDefined(hv, bins=bins)
ud4
UserDefined
Interval Count
--------------------------------
[ 10888.42, 100000.00] | 61
( 100000.00, 500000.00] | 3054
( 500000.00, 1000000.00] | 1251
(1000000.00, 1500000.00] | 214
f, ax = plt.subplots(1, figsize=(12, 8))
gdf.plot(column='median_home_value', scheme='QUANTILES', ax=ax, alpha=0.6, legend=True)
cx.add_basemap(ax, crs=gdf.crs.to_string(), source=cx.providers.Stamen.TonerLite)
ax.set_axis_off()
plt.show()
f, ax = plt.subplots(1, figsize=(12, 8))
gdf.plot(column='median_home_value', scheme='FisherJenks', ax=ax,
alpha=0.6, legend=True)
cx.add_basemap(ax, crs=gdf.crs.to_string(), source=cx.providers.Stamen.TonerLite)
ax.set_axis_off()
plt.show()
f, ax = plt.subplots(1, figsize=(12, 8))
gdf.plot(column='median_home_value', scheme='FisherJenks', ax=ax,
alpha=0.6, legend=True, cmap='Blues')
cx.add_basemap(ax, crs=gdf.crs.to_string(), source=cx.providers.Stamen.TonerLite)
ax.set_axis_off()
plt.show()
f, ax = plt.subplots(1, figsize=(12, 8))
gdf.plot(column='median_home_value', scheme='FisherJenks', ax=ax,
alpha=0.6, legend=True, cmap='Blues')
cx.add_basemap(ax, crs=gdf.crs.to_string(), source=cx.providers.Stamen.TonerLite)
ax.set_axis_off()
plt.show()
f, ax = plt.subplots(1, figsize=(12, 8))
gdf.plot(column='median_home_value', scheme='FisherJenks', ax=ax,
alpha=0.6, legend=True, cmap='Blues')
cx.add_basemap(ax, crs=gdf.crs.to_string(), source=cx.providers.Stamen.TonerLite)
ax.set_axis_off()
plt.show()
gdf.geoid
0 06037128702
1 06037131600
2 06037134104
3 06037134304
4 06037242000
...
4575 06037920029
4576 06037542000
4577 06111004304
4578 06065044804
4579 06037124400
Name: geoid, Length: 4580, dtype: object
import numpy
counties = set([geoid[:5] for geoid in gdf.geoid])
counties
{'06025', '06037', '06059', '06065', '06071', '06073', '06111'}
for county in counties:
cgdf = gdf[gdf['geoid'].str.match(f'^{county}')]
f, ax = plt.subplots(1, figsize=(12, 8))
cgdf.plot(column='median_home_value', scheme='FisherJenks', ax=ax,
edgecolor='grey', legend=True, cmap='Blues', alpha=0.6)
plt.title(county)
ax.set_axis_off()
plt.show()
import geoviews as gv
import geoviews.feature as gf
import xarray as xr
from cartopy import crs
import hvplot.pandas
import geopandas as gpd
cities = gpd.read_file(gpd.datasets.get_path('naturalearth_cities'))
cities.hvplot(global_extent=True, frame_height=450, tiles=True)
shp_link = 'data/scag_region.gpkg'
gdf = gpd.read_file(shp_link)
gdf.fillna(gdf.mean(), inplace = True)
/srv/conda/envs/notebook/lib/python3.7/site-packages/ipykernel_launcher.py:3: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError. Select only valid columns before calling the reduction. This is separate from the ipykernel package so we can avoid doing imports until
gdf.hvplot()
gdf.hvplot(geo=True)
def choro(df, field, scheme='Quantiles', ncolors=5, cmap='Blues',
width=600, height=400, tools=['hover']):
from holoviews.plotting.util import process_cmap
cl = getattr(mapclassify, scheme)
yb = cl(df[field], k=ncolors).yb
pcmap = process_cmap(cmap, ncolors=ncolors)
#yb, pcmap = choro(values, scheme=scheme, ncolors=ncolors, cmap=cmap)
df[scheme] = yb
return gv.Polygons(df, vdims=[scheme, field]).opts(cmap=pcmap, width=width, height=height, tools=tools, line_width=0.1)
choro(gdf, 'median_home_value')
choro(gdf, 'median_home_value', ncolors=9, cmap='Greens')
(gv.tile_sources.CartoLight * choro(gdf, 'median_home_value', scheme='FisherJenks', ncolors=9, cmap='Greens'))
# %load solutions/01.py

<span
xmlns:dct="http://purl.org/dc/terms/" property="dct:title">Geovisualization</span> by <a xmlns:cc="http://creativecommons.org/ns#"
href="http://sergerey.org" property="cc:attributionName"
rel="cc:attributionURL">Serge Rey</a> is licensed under a Creative
Commons Attribution-NonCommercial-ShareAlike 4.0 International License.